04 October 2019
The Data, Insights and Advisory Solution you need to win in an ecommerce-driven world.
What does it mean?
(Formerly a catch all role)
library(RSelenium)
rd <- rsDriver(browser = "firefox", port = 4444L) # Download binaries, start driver
my_session <- rd$client # Create client object
my_session$open() # Open session
search_terms <- c("Data%20Scientist", "Data%20Science", "Statistics", "Statistician"); term = 1
my_session$navigate( # Navigate to the page
paste0("https://ie.linkedin.com/jobs/search?keywords=", search_terms[term],
"&location=Dublin%2C%20Ireland&trk=guest_job_search_jobs-search-bar_search-submit&
redirect=false&position=1&pageNum=0"))
for (i in 1:20) { # Loop and click "Load more jobs" button
btn_available <- # Check if button still exists
tryCatch({
load_btn <-
my_session$findElement(using = "css selector", ".see-more-jobs")
TRUE
},error = function(e) FALSE)
if(!btn_available) break # End loop if no button
load_btn$clickElement() # Click button
Sys.sleep(runif(1, 3, 5)) # Random wait between 3 and 5 seconds
}
my_session$getPageSource()[[1]] %>% # Get HTML and save Data
writeLines(paste0("data/", format(Sys.time(), "%Y_%m_%d"), "_LI_",
gsub("%20", "_", search_terms[term]), "_Dublin.txt"))
my_session$close() # Close session
rd[["server"]]$stop() # stop driver
lapply(c("Statistician", "Data_Scientist", "Statistics", "Data_Science"),
function(jobtitle){
paste0("data/2020_03_01_LI_", jobtitle, "_Dublin.txt") %>% # Filename
xml2::read_html() %>% # Read in data as HTML
lapply(X = 1:700, FUN = function(job_i, raw_html = .){ # Parse Initial HTML
raw_html %>% rvest::html_nodes(xpath = paste0('/html/body/main/div/section/ul/li[', job_i,']'))
}) %>%
lapply(function(main_html){ # Parse sub elements of HTML
c(main_html %>% rvest::html_nodes(xpath = 'a') %>% # Title
rvest::html_text() %>% ifelse(test = length(.) > 0, ., NA),
main_html %>% rvest::html_nodes(xpath = 'div[1]/h4/a') %>% # Company
rvest::html_text() %>% ifelse(test = length(.) > 0, ., NA),
main_html %>% rvest::html_nodes(xpath = 'div[1]/div') %>% # Description
rvest::html_text( ) %>% ifelse(test = length(.) > 0, ., NA),
jobtitle)
}) %>%
do.call(what = rbind, .) # Combine data for each term
}) %>%
do.call(rbind, .) %>% # Combine 3 data sets
as.data.frame(stringsAsFactors = F) %>% # Create data frame
dplyr::select(Title = V1, Company = V2, Text = V3, SearchTerm = V4) %>% # Rename variables
dplyr::filter(!is.na(Title)) -> job_data # Remove NA's
| Search Term | Results |
|---|---|
| Statistician | 11 |
| Data_Science | 25 |
| Data_Scientist | 260 |
| Statistics | 444 |
```{r}
ggplot(mpg ,aes(displ, cty, colour = class)) +
geom_point()
```
```{r, fig.retina = 4, dev.args = list(bg = 'transparent')}
ggplot(mpg ,aes(displ, cty, colour = class)) +
geom_point() + theme(plot.background = element_rect(fill = "transparent", color = NA))
```